import altair as alt
Let's use datasets from vega_datasets
from vega_datasets import data
cars = data.cars()
cars.head()
| Name | Miles_per_Gallon | Cylinders | Displacement | Horsepower | Weight_in_lbs | Acceleration | Year | Origin | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | chevrolet chevelle malibu | 18.0 | 8 | 307.0 | 130.0 | 3504 | 12.0 | 1970-01-01 | USA |
| 1 | buick skylark 320 | 15.0 | 8 | 350.0 | 165.0 | 3693 | 11.5 | 1970-01-01 | USA |
| 2 | plymouth satellite | 18.0 | 8 | 318.0 | 150.0 | 3436 | 11.0 | 1970-01-01 | USA |
| 3 | amc rebel sst | 16.0 | 8 | 304.0 | 150.0 | 3433 | 12.0 | 1970-01-01 | USA |
| 4 | ford torino | 17.0 | 8 | 302.0 | 140.0 | 3449 | 10.5 | 1970-01-01 | USA |
Minimal scatterplot using cars dataset:
alt.Chart(cars).mark_point().encode(x='Miles_per_Gallon', y='Horsepower')
With circles marks:
alt.Chart(cars).mark_circle(
opacity=0.5 #transparent circles
).encode(x='Miles_per_Gallon', y='Horsepower')
Bar chart of car models counts per Origin
alt.Chart(cars).mark_bar().encode(x='Origin', y='count(Origin)')
Same with dataframe of counts
#create a dataframe of counts
origin_counts = cars[['Origin']].groupby('Origin').size().reset_index(name='counts')
origin_counts
alt.Chart(origin_counts).mark_bar().encode(x='Origin', y='counts')
With labels rotated and sized
alt.Chart(cars, width=300).mark_bar().encode(
x=alt.X('Origin', axis = alt.Axis(labelAngle=0)),
y='count(Origin)')
alt.Chart(cars).mark_boxplot().encode(
x='Origin',
y='Miles_per_Gallon'
)
alt.Chart(cars).mark_bar().encode(
alt.X("Miles_per_Gallon", bin=True),
y='count()')
import pandas as pd #import pandas
import numpy as np #import numpy
x = np.linspace(0, 100, 5)
y = x * x
df = pd.DataFrame({
'x' : x,
'y' : y
})
alt.Chart(df).mark_line().encode(
alt.X('x'),
alt.Y('y'))
alt.Chart(cars).mark_circle(opacity=0.5).encode(
x=alt.X('Miles_per_Gallon', axis=alt.Axis(title='Miles per gallon'), scale=alt.Scale(zero=False)),
y=alt.Y('Horsepower', axis=alt.Axis(title='Horsepower'), scale=alt.Scale(zero=False)),
color=alt.Color('Origin', legend=alt.Legend(title="Origin")),
tooltip = ['Miles_per_Gallon', 'Horsepower']
).properties(
title='Cars Data',
width=300,
height=180
).interactive()
Create a scatterplot of the iris dataset:
x to sepalLengthy to sepalWidthcolor to speciesimport altair as alt
from vega_datasets import data
df = data.iris()
df
alt.Chart(df).mark_circle(opacity=0.5).encode(
alt.X('sepalLength:Q', title='Sepal length (cm)',scale=alt.Scale(zero=False)),
alt.Y('sepalWidth:Q', title='Sepal width (cm)',scale=alt.Scale(zero=False)),
color=alt.Color('species'),
).properties(title="Iris dataset")
Create a scatterplot of the iris dataset:
x to sepalLengthy to sepalWidthcolor to speciessize to petalLengthcolor to speciestitle to Species by colorimport altair as alt
from vega_datasets import data
df = data.iris()
alt.Chart(df).mark_circle(opacity=0.5).encode(
x=alt.X('sepalLength', axis=alt.Axis(title='Sepal length (cm)'), scale=alt.Scale(zero=False)),
y=alt.Y('sepalWidth', axis=alt.Axis(title='Sepal width (cm)'), scale=alt.Scale(zero=False)),
color=alt.Color('species', legend=alt.Legend(title="Species by color")),
size = 'petalLength',
tooltip = ['sepalLength', 'sepalWidth']
).properties(
title='Iris dataset',
# width=500,
# height=280
)#.interactive()
Create a scatterplot of the SMO-VOR-2015.csv dataset:
SMO-VOR-2015.csv in a dataframex to ptimey to altopacity to 0.1import pandas as pd
df = pd.read_csv('data/SMO-VOR-2015.csv')
import altair as alt
df.ptime = pd.to_datetime(df.ptime)
alt.data_transformers.disable_max_rows()
alt.Chart(df).mark_circle(opacity=0.1).encode(
x=alt.X('ptime', axis=alt.Axis(title='Penetration time'), scale=alt.Scale(zero=False)),
y=alt.Y('alt', axis=alt.Axis(title='Altitude (feets)'), scale=alt.Scale(zero=False)),
tooltip = ['ptime', 'flight','icao', 'ops']
).properties(
title='SMO VOR 2015 dataset',
# width=700,
# height=380
).interactive()
Create a boxplot of the SMO-VOR-2015.csv dataset:
SMO-VOR-2015.csv in a dataframex to monthy to altcolor to monthtitle to Nonetitle to 'Altitude (feets)'import pandas as pd
import altair as alt
df = pd.read_csv('data/SMO-VOR-2015.csv')
months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
df.month = pd.Categorical(df.month, ordered=True, categories=months)
alt.data_transformers.disable_max_rows()
alt.Chart(df).mark_boxplot().encode(
alt.X('month', title = ''),
alt.Y('alt', title = 'Altitude (feets)', scale = alt.Scale(zero=False)),
color = alt.Color('month')
).properties(title ='SMO VOR 2015 dataset', width = 600 )
Create a histogram of the SMO-VOR-2015.csv dataset:
SMO-VOR-2015.csv in a dataframex to altimport pandas as pd
import altair as alt
df = pd.read_csv('data/SMO-VOR-2015.csv')
alt.data_transformers.disable_max_rows() #needed because of dataset size
alt.Chart(df).mark_bar().encode(
alt.X('alt', title='Altitude (feets)',bin=True),
y = alt.Y('count()', title='Counts')
).properties(title='SMO VOR 2015 dataset')
Create a histograms of the SMO-VOR-2015.csv dataset faceted by month:
SMO-VOR-2015.csv in a dataframex to altfacet=alt.Facet('month', sort=months) encodingimport pandas as pd
import altair as alt
df = pd.read_csv('data/SMO-VOR-2015.csv')
alt.data_transformers.disable_max_rows() #needed because of dataset size
alt.Chart(df).mark_bar().encode(
alt.X('alt', title = 'Altitude (feets)', bin=True),
alt.Y('count()', title='Counts'),
facet=alt.Facet('month', sort=months)
).properties(title='SMO VOR 2015 dataset')